library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(stringr)
library(ggplot2)
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
# get filenames of all csvs
path_to_csvs <- "~/Google_Drive/Simplification-for-generalization/simplification-data"
all_csvs <- list.files(path = path_to_csvs, pattern = "\\.csv")
# load all csvs into a single dataframe
all_runs <- data.frame()
for (filename in all_csvs) {
tmp_df <- read.csv(paste(path_to_csvs, filename, sep = '/'), stringsAsFactors = F)
colnames(tmp_df) <- sapply(colnames(tmp_df), str_trim)
s <- unlist(str_split(filename, '[.-]'))
s <- s[4:length(s)-1]
tmp_df$problem <- str_c(s, sep = '-', collapse = '-')
all_runs <<- bind_rows(all_runs, tmp_df)
}
# cbbPalette <- c("#E69F00", "#56B4E9", "#000000", "#009E73", "#D55E00", "#CC79A7", "#F0E442", "#0072B2")
cbbPalette <- c("#009E73", "#E69F00", "#56B4E9", "#F0E442", "#000000", "#D55E00", "#0072B2")
Nics plot
unsimplified_info <- all_runs %>%
filter(type == "unsimplified") %>%
mutate(genPre = testError == 0) %>%
mutate(originalSize = programSize) %>%
select(problem, log, method, originalSize, genPre)
all_runs %>%
filter(type == "simplified") %>%
mutate(genPost = testError == 0,
simplifiedSize = programSize) %>%
inner_join(unsimplified_info, by = c("problem", "log", "method")) %>%
unite(quad, genPre, genPost, sep = "_") %>%
select(problem, method, originalSize, simplifiedSize, quad) %>%
ggplot(aes(x = originalSize,
y = simplifiedSize)) +
geom_point(aes(color = problem),
alpha = 0.3,
size = 0.3,
shape = 4) +
geom_abline(slope = 1,
intercept = 0,
color = 'grey') +
geom_smooth(method = "lm",
se = FALSE,
color = 'black',
linetype = 2,
size = 0.5) +
facet_grid(quad ~ method) +
theme_bw() +
guides(colour = guide_legend(override.aes = list(alpha = 1)))

Nics plot rotated
unsimplified_info <- all_runs %>%
filter(type == "unsimplified") %>%
mutate(genPre = testError == 0) %>%
mutate(originalSize = programSize) %>%
select(problem, log, method, originalSize, genPre)
all_runs %>%
filter(type == "simplified") %>%
mutate(genPost = testError == 0,
simplifiedSize = programSize) %>%
inner_join(unsimplified_info, by = c("problem", "log", "method")) %>%
unite(quad, genPre, genPost, sep = "_") %>%
select(problem, method, originalSize, simplifiedSize, quad) %>%
ggplot(aes(x = originalSize,
y = simplifiedSize)) +
geom_point(aes(color = method),
alpha = 0.3,
size = 0.5,
shape = 4) +
geom_abline(slope = 1,
intercept = 0,
color = 'grey') +
geom_smooth(method = "lm",
se = FALSE,
color = 'black',
linetype = 2,
size = 0.5) +
facet_grid(quad ~ problem,
scales = "free") +
theme_bw() +
theme(axis.text.x = element_text(angle = 15, hjust = 1)) +
guides(colour = guide_legend(override.aes = list(alpha = 1)))

Problem Vs Avgerage program Size by Method
Table
simp_runs <- all_runs %>%
filter(type == "simplified") %>%
group_by(method, problem) %>%
summarise(avgProgramSize = round(mean(programSize), 3)) %>%
spread(method, avgProgramSize)
prog_size_table <- all_runs %>%
filter(type == "unsimplified") %>%
group_by(problem) %>%
summarise(none = round(mean(programSize), 3)) %>%
inner_join(simp_runs, by = c("problem"))
write.csv(prog_size_table, "prog_size_table.csv")
Plot
horiz_lines_df <- melt(prog_size_table,
id.vars = c("problem"),
measure.vars = c("none", "Genome", "GenomeBacktracking", "GenomeBacktrackingNoop", "GenomeNoop", "Program"),
value.name = "size") %>%
filter(variable == 'none')
horiz_lines_df$x1 <- seq(0.55, 23.55, 1)
horiz_lines_df$x2 <- seq(1.45, 24.45, 1)
melt(prog_size_table,
id.vars = c("problem"),
measure.vars = c("none", "Genome", "GenomeBacktracking", "GenomeBacktrackingNoop", "GenomeNoop", "Program"),
value.name = "size") %>%
filter(variable != 'none') %>%
ggplot(aes(x = problem,
y = size,
fill = variable)) +
geom_bar(stat = "identity",
position = "dodge") +
geom_rect(data = horiz_lines_df,
aes(xmin = x1,
xmax = x2,
ymin = 0,
ymax = size),
fill = '#E5E5E5') +
geom_segment(data = horiz_lines_df,
aes(x = x1,
y = size,
xend = x2,
yend = size),
size = 1.5) +
geom_bar(stat = "identity",
position = "dodge") +
theme_bw() +
theme(axis.text.x = element_text(angle = 15, hjust = 1),
legend.position="top") +
labs(x = "Problem",
y = "Program Size") +
scale_fill_manual(values = cbbPalette,
name = 'Method')

Problem Vs Percent Generalized by Method
Table
simp_runs <- all_runs %>%
filter(type == "simplified") %>%
mutate(generalized = ifelse(testError == 0, 1, 0)) %>%
group_by(method, problem) %>%
summarise(percentGeneralized = mean(generalized)) %>%
spread(method, percentGeneralized)
perct_generalized_table <- all_runs %>%
filter(type == "unsimplified") %>%
mutate(generalized = ifelse(testError == 0, 1, 0)) %>%
group_by(problem) %>%
summarise(none = mean(generalized)) %>%
inner_join(simp_runs, by = c("problem"))
write.csv(perct_generalized_table, "perct_generalized_table.csv")
Plot
horiz_lines_df <- melt(perct_generalized_table,
id.vars = c("problem"),
measure.vars = c("none", "Genome", "GenomeBacktracking", "GenomeBacktrackingNoop", "GenomeNoop", "Program"),
value.name = "pct_gener") %>%
filter(variable == 'none')
horiz_lines_df$x1 <- seq(0.55, 23.55, 1)
horiz_lines_df$x2 <- seq(1.45, 24.45, 1)
melt(perct_generalized_table,
id.vars = c("problem"),
measure.vars = c("none", "Genome", "GenomeBacktracking", "GenomeBacktrackingNoop", "GenomeNoop", "Program"),
value.name = "pct_gener") %>%
filter(variable != 'none') %>%
ggplot(aes(x = problem,
y = pct_gener,
fill = variable)) +
geom_bar(stat = "identity",
position = "dodge") +
geom_rect(data = horiz_lines_df,
aes(xmin = x1,
xmax = x2,
ymin = 0,
ymax = pct_gener),
fill = '#E5E5E5') +
geom_bar(stat = "identity",
position = "dodge") +
geom_segment(data = horiz_lines_df,
aes(x = x1,
y = pct_gener,
xend = x2,
yend = pct_gener),
size = 1.5) +
theme_bw() +
theme(axis.text.x = element_text(angle = 15, hjust = 1),
legend.position="top") +
labs(x = "Problem",
y = "% Generalized") +
scale_fill_manual(values = cbbPalette,
name = 'Method')

Simplifified Size vs Percent Generalized
Plot a
unsimplified_that_generalize <- all_runs %>%
filter(type == "unsimplified",
testError == 0) %>%
select(log, problem) %>%
distinct()
all_runs %>%
inner_join(unsimplified_that_generalize, by = c("log", "problem")) %>%
filter(type == "simplified") %>%
mutate(generalized = ifelse(testError == 0, 1, 0)) %>%
group_by(programSize, problem) %>%
summarise(percentGeneralized = mean(generalized)) %>%
ggplot(aes(x = programSize,
y = percentGeneralized)) +
geom_point(size = 1,
alpha = 0.5) +
#facet_wrap(~ problem) +
theme_bw() +
labs(title = "Unsimplified Programs that Generalized",
x = "Post Simplification Program Size",
y = "% Generalized")

Plot b
unsimplified_that_dont_generalize <- all_runs %>%
filter(type == "unsimplified",
testError > 0) %>%
select(log, problem) %>%
distinct()
all_runs %>%
inner_join(unsimplified_that_dont_generalize, by = c("log", "problem")) %>%
filter(type == "simplified") %>%
mutate(generalized = ifelse(testError == 0, 1, 0)) %>%
group_by(programSize, problem) %>%
summarise(percentGeneralized = mean(generalized)) %>%
ggplot(aes(x = programSize,
y = percentGeneralized)) +
geom_point(size = 1,
alpha = 0.5) +
#facet_wrap(~ problem) +
theme_bw() +
labs(title = "Unsimplified Programs that Didn't Generalized",
x = "Post Simplification Program Size",
y = "% Generalized")

Density of simplified size
Not faceted
all_runs %>%
filter(type == "simplified") %>%
mutate(generalized = testError == 0) %>%
ggplot(aes(x = programSize,
fill = generalized,
color = generalized)) +
geom_density(alpha = 0.5) +
#facet_wrap(~ problem, scales = "free") +
theme_bw() +
labs(x = "Post Simplification Program Size")

Facet where unsimplified program generalized
all_runs %>%
inner_join(unsimplified_that_generalize, by = c("log", "problem")) %>%
filter(type == "simplified") %>%
mutate(generalized = testError == 0) %>%
ggplot(aes(x = programSize,
fill = generalized,
color = generalized)) +
geom_density(alpha = 0.5) +
facet_wrap(~ problem, scales = "free") +
theme_bw() +
labs(title = "Unsimplified Programs that Generalized",
x = "Post Simplification Program Size")

Facet where unsimplified program DIDN’T generalized
all_runs %>%
inner_join(unsimplified_that_dont_generalize, by = c("log", "problem")) %>%
filter(type == "simplified") %>%
mutate(generalized = testError == 0) %>%
ggplot(aes(x = programSize,
fill = generalized,
color = generalized)) +
geom_density(alpha = 0.5) +
facet_wrap(~ problem, scales = "free") +
theme_bw() +
labs(title = "Unsimplified Programs that Didn't Generalized",
x = "Post Simplification Program Size")

Desity (stat = “count”) of simplified size
Not faceted
all_runs %>%
filter(type == "simplified") %>%
mutate(generalized = testError == 0) %>%
ggplot(aes(x = programSize,
fill = generalized,
color = generalized)) +
geom_density(alpha = 0.5,
stat = "count") +
theme_bw() +
labs(x = "Post Simplification Program Size") +
scale_y_log10()

Not faceted A
all_runs %>%
inner_join(unsimplified_that_generalize, by = c("log", "problem")) %>%
filter(type == "simplified") %>%
mutate(generalized = testError == 0) %>%
ggplot(aes(x = programSize,
fill = generalized,
color = generalized)) +
geom_density(alpha = 0.5,
stat = "count") +
theme_bw() +
labs(title = "Unsimplified Programs that Generalized",
x = "Post Simplification Program Size")

Not faceted B
all_runs %>%
inner_join(unsimplified_that_dont_generalize, by = c("log", "problem")) %>%
filter(type == "simplified") %>%
mutate(generalized = testError == 0) %>%
ggplot(aes(x = programSize,
fill = generalized,
color = generalized)) +
geom_density(alpha = 0.5,
stat = "count") +
theme_bw() +
labs(title = "Unsimplified Programs that Didn't Generalized",
x = "Post Simplification Program Size")

Facet where unsimplified program generalized
all_runs %>%
inner_join(unsimplified_that_generalize, by = c("log", "problem")) %>%
filter(type == "simplified") %>%
mutate(generalized = testError == 0) %>%
ggplot(aes(x = programSize,
fill = generalized,
color = generalized)) +
geom_density(alpha = 0.5,
stat = "count") +
facet_wrap(~ problem, scales = "free") +
theme_bw() +
labs(title = "Unsimplified Programs that Generalized",
x = "Post Simplification Program Size")

Facet where unsimplified program DIDN’T generalized
all_runs %>%
inner_join(unsimplified_that_dont_generalize, by = c("log", "problem")) %>%
filter(type == "simplified") %>%
mutate(generalized = testError == 0) %>%
ggplot(aes(x = programSize,
fill = generalized,
color = generalized)) +
geom_density(alpha = 0.5,
stat = "count") +
facet_wrap(~ problem, scales = "free") +
theme_bw() +
labs(title = "Unsimplified Programs that Didn't Generalized",
x = "Post Simplification Program Size")
